set.seed(2021)
Load scripts and packages.
library(SingleCellMultiModal)
library(MultiAssayExperiment)
library(UpSetR)
source("../scripts/initialise.R")
Load data using SingleCellMultiModal Bioconductor package.
mae <- scMultiome("pbmc_10x", mode = "*", dry.run = FALSE, format = "MTX")
Perform some exploration of this data.
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 65, c = 100)[1:n]
}
colors <- gg_color_hue(length(unique(mae$celltype)))
names(colors) <- unique(mae$celltype)
mae
## A MultiAssayExperiment object of 2 listed
## experiments with user-defined names and respective classes.
## Containing an ExperimentList class object of length 2:
## [1] atac: SingleCellExperiment with 108344 rows and 10032 columns
## [2] rna: SingleCellExperiment with 36549 rows and 10032 columns
## Functionality:
## experiments() - obtain the ExperimentList instance
## colData() - the primary/phenotype DataFrame
## sampleMap() - the sample coordination DataFrame
## `$`, `[`, `[[` - extract colData columns, subset, or experiment
## *Format() - convert into a long or wide DataFrame
## assays() - convert ExperimentList to a SimpleList of matrices
## exportClass() - save all data to files
upsetSamples(mae)
head(colData(mae))
## DataFrame with 6 rows and 6 columns
## nCount_RNA nFeature_RNA nCount_ATAC nFeature_ATAC
## <integer> <integer> <integer> <integer>
## AAACAGCCAAGGAATC 8380 3308 55582 13878
## AAACAGCCAATCCCTT 3771 1896 20495 7253
## AAACAGCCAATGCGCT 6876 2904 16674 6528
## AAACAGCCAGTAGGTG 7614 3061 39454 11633
## AAACAGCCAGTTTACG 3633 1691 20523 7245
## AAACAGCCATCCAGGT 7782 3028 22412 8602
## celltype broad_celltype
## <character> <character>
## AAACAGCCAAGGAATC naive CD4 T cells Lymphoid
## AAACAGCCAATCCCTT memory CD4 T cells Lymphoid
## AAACAGCCAATGCGCT naive CD4 T cells Lymphoid
## AAACAGCCAGTAGGTG naive CD4 T cells Lymphoid
## AAACAGCCAGTTTACG memory CD4 T cells Lymphoid
## AAACAGCCATCCAGGT non-classical monocy.. Myeloid
dim(experiments(mae)[["rna"]])
## [1] 36549 10032
names(experiments(mae))
## [1] "atac" "rna"
sce.rna <- experiments(mae)[["rna"]]
Normalise and select features for RNA modality. Perform PCA and visualise the RNA modality.
sce.rna <- logNormCounts(sce.rna)
decomp <- modelGeneVar(sce.rna)
hvgs <- rownames(decomp)[decomp$mean>0.01 & decomp$p.value <= 0.05]
sce.rna <- sce.rna[hvgs,]
# PCA
sce.rna <- scater::runPCA(sce.rna, ncomponents = 25)
# UMAP
sce.rna <- scater::runUMAP(sce.rna, dimred="PCA", n_neighbors = 25, min_dist = 0.3)
plotUMAP(sce.rna, colour_by="celltype", point_size=0.5, point_alpha=1)
Normalise and select features for ATAC modality. Perform PCA and visualise the ATAC modality. Note these cells are the same as the RNA modality.
dim(experiments(mae)[["atac"]])
## [1] 108344 10032
sce.atac <- experiments(mae)[["atac"]]
sce.atac <- logNormCounts(sce.atac)
decomp <- modelGeneVar(sce.atac)
hvgs <- rownames(decomp)[decomp$mean>0.25 & decomp$p.value <= 0.05]
sce.atac <- sce.atac[hvgs,]
sce.atac <- scater::runPCA(sce.atac, ncomponents = 25)
# UMAP
sce.atac <- scater::runUMAP(sce.atac, dimred="PCA", n_neighbors = 25, min_dist = 0.3)
plotUMAP(sce.atac, colour_by="celltype", point_size=0.5, point_alpha=1)
Split the cells into three groups with ratios provided below, first for RNA modality, second for multiome modality, and third for ATAC modality. Using this simulation, perform StabMap. Note that other techniques such as PCA, UINMF, or MultiMAP require some further matching of features between the RNA and ATAC modalities to be able to run.
Assess the quality of the mapping visually.
gList = list()
for (probval in c(1:10)) {
print(probval)
modality = sample(c("RNA", "Multiome", "ATAC"), size = ncol(sce.rna),
replace = TRUE,
prob = c(probval,1,probval))
names(modality) <- colnames(sce.rna)
table(modality)
# multiome RNA
M_R = assay(sce.rna, "logcounts")[,modality == "Multiome"]
# multiome ATAC
M_A = assay(sce.atac, "logcounts")[,modality == "Multiome"]
# RNA
R = assay(sce.rna, "logcounts")[,modality == "RNA"]
# ATAC
A = assay(sce.atac, "logcounts")[,modality == "ATAC"]
# they dont have any overlapping features
Reduce(intersect, list(c(rownames(M_R), rownames(M_A)), rownames(R), rownames(A)))
# there should be no overlapping colnames between the data,
# but colnames should be identical between the two multiome sub matrices
identical(colnames(M_R), colnames(M_A))
intersect(colnames(M_R), colnames(R))
intersect(colnames(M_R), colnames(A))
intersect(colnames(R), colnames(A))
assay_list = list(RNA = R,
ATAC = A,
Multiome = rbind(M_R, M_A))
out_joint = stabMapGeneralised(assay_list,
reference_list = c("Multiome"),
projectAll = TRUE,
plot = FALSE)
# out_joint_UMAP = calculateUMAP_rnames(out_joint)
out_joint_corrected = reducedMNN_batchFactor(
as.matrix(out_joint), batchFactor = modality[rownames(out_joint)])
out_joint_UMAP_corrected = calculateUMAP_rnames(out_joint_corrected)
out_joint_df = data.frame(cell = names(modality),
modality = modality,
# UMAP1 = out_joint_UMAP[names(modality),1],
# UMAP2 = out_joint_UMAP[names(modality),2],
celltype = colData(sce.rna)[names(modality),"celltype"],
UMAP1_corrected = out_joint_UMAP_corrected[names(modality),1],
UMAP2_corrected = out_joint_UMAP_corrected[names(modality),2])
out_joint_df <- out_joint_df[sample(rownames(out_joint_df)),]
g = ggplot(out_joint_df, aes(x = modality)) +
geom_bar(aes(fill = modality)) +
ylab("Number of cells") +
theme_classic() +
ggplot(out_joint_df, aes(x = UMAP1_corrected, y = UMAP2_corrected)) +
geom_point(aes(colour = modality)) +
theme_classic() +
ggplot(out_joint_df, aes(x = UMAP1_corrected, y = UMAP2_corrected)) +
geom_point(aes(colour = celltype)) +
theme_classic() +
ggplot(out_joint_df, aes(x = UMAP1_corrected, y = UMAP2_corrected)) +
geom_point(aes(colour = celltype)) +
facet_wrap(~modality) +
theme_classic() +
plot_layout(design = c("
123
444
")) +
NULL
gList[[probval]] <- g
print(g)
}
## [1] 1
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] 2
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] 3
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] 4
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] 5
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] 6
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] 7
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] 8
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] 9
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] 10
## [1] "fitting linear models"
## [1] "fitted linear models"
## [1] "fitting linear models"
## [1] "fitted linear models"
Finish
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] igraph_1.2.6 batchelor_1.8.0
## [3] patchwork_1.1.1 scran_1.20.0
## [5] scater_1.20.0 ggplot2_3.3.3
## [7] scuttle_1.2.0 SingleCellExperiment_1.14.1
## [9] UpSetR_1.4.0 SingleCellMultiModal_1.4.0
## [11] MultiAssayExperiment_1.18.0 SummarizedExperiment_1.22.0
## [13] Biobase_2.52.0 GenomicRanges_1.44.0
## [15] GenomeInfoDb_1.28.0 IRanges_2.26.0
## [17] S4Vectors_0.30.0 BiocGenerics_0.38.0
## [19] MatrixGenerics_1.4.0 matrixStats_0.58.0
##
## loaded via a namespace (and not attached):
## [1] AnnotationHub_3.0.0 BiocFileCache_2.0.0
## [3] plyr_1.8.6 BiocParallel_1.26.0
## [5] digest_0.6.27 htmltools_0.5.1.1
## [7] viridis_0.6.1 magick_2.7.2
## [9] fansi_0.4.2 magrittr_2.0.1
## [11] memoise_2.0.0 ScaledMatrix_1.0.0
## [13] SpatialExperiment_1.2.0 cluster_2.1.2
## [15] limma_3.48.0 Biostrings_2.60.0
## [17] R.utils_2.10.1 colorspace_2.0-1
## [19] blob_1.2.1 rappdirs_0.3.3
## [21] xfun_0.23 dplyr_1.0.6
## [23] crayon_1.4.1 RCurl_1.98-1.3
## [25] jsonlite_1.7.2 glue_1.4.2
## [27] gtable_0.3.0 zlibbioc_1.38.0
## [29] XVector_0.32.0 DelayedArray_0.18.0
## [31] BiocSingular_1.8.0 DropletUtils_1.12.0
## [33] Rhdf5lib_1.14.0 HDF5Array_1.20.0
## [35] scales_1.1.1 DBI_1.1.1
## [37] edgeR_3.34.0 Rcpp_1.0.6
## [39] viridisLite_0.4.0 xtable_1.8-4
## [41] dqrng_0.3.0 bit_4.0.4
## [43] rsvd_1.0.5 ResidualMatrix_1.2.0
## [45] metapod_1.0.0 httr_1.4.2
## [47] ellipsis_0.3.2 pkgconfig_2.0.3
## [49] R.methodsS3_1.8.1 farver_2.1.0
## [51] sass_0.4.0 uwot_0.1.10
## [53] dbplyr_2.1.1 locfit_1.5-9.4
## [55] utf8_1.2.1 tidyselect_1.1.1
## [57] labeling_0.4.2 rlang_0.4.11
## [59] later_1.2.0 AnnotationDbi_1.54.0
## [61] munsell_0.5.0 BiocVersion_3.13.1
## [63] tools_4.1.0 cachem_1.0.5
## [65] generics_0.1.0 RSQLite_2.2.7
## [67] ExperimentHub_2.0.0 evaluate_0.14
## [69] stringr_1.4.0 fastmap_1.1.0
## [71] yaml_2.2.1 knitr_1.33
## [73] bit64_4.0.5 purrr_0.3.4
## [75] KEGGREST_1.32.0 sparseMatrixStats_1.4.0
## [77] mime_0.10 R.oo_1.24.0
## [79] compiler_4.1.0 beeswarm_0.3.1
## [81] filelock_1.0.2 curl_4.3.1
## [83] png_0.1-7 interactiveDisplayBase_1.30.0
## [85] tibble_3.1.2 statmod_1.4.36
## [87] bslib_0.2.5.1 stringi_1.6.2
## [89] highr_0.9 RSpectra_0.16-0
## [91] lattice_0.20-44 bluster_1.2.0
## [93] Matrix_1.3-3 vctrs_0.3.8
## [95] pillar_1.6.1 lifecycle_1.0.0
## [97] rhdf5filters_1.4.0 BiocManager_1.30.15
## [99] jquerylib_0.1.4 RcppAnnoy_0.0.18
## [101] BiocNeighbors_1.10.0 cowplot_1.1.1
## [103] bitops_1.0-7 irlba_2.3.3
## [105] httpuv_1.6.1 R6_2.5.0
## [107] promises_1.2.0.1 gridExtra_2.3
## [109] vipor_0.4.5 codetools_0.2-18
## [111] assertthat_0.2.1 rhdf5_2.36.0
## [113] rjson_0.2.20 withr_2.4.2
## [115] GenomeInfoDbData_1.2.6 grid_4.1.0
## [117] beachmat_2.8.0 rmarkdown_2.8
## [119] DelayedMatrixStats_1.14.0 shiny_1.6.0
## [121] ggbeeswarm_0.6.0